# This script is called from voteredo2022_build_analysis.R #

#### DATA ####

dat <- read.csv("../input/cces_2020_LVweighted.csv")

#### Inconsistent dataprep ####
dat$inconsistent <- 0
dat$inconsistent[dat$CC20_334c==1 & dat$CC20_334d==1] <- 1

dat$inconsistenta <- 0
dat$inconsistenta[dat$CC20_332a==1 & dat$CC20_332f==1] <- 1

dat$type <- "1 contradiction"
dat$type[dat$inconsistent==0 & dat$inconsistenta==0] <- "No contradictions"
dat$type[dat$inconsistent==1 & dat$inconsistenta==1] <- "2 contradictions"


# Create a variable identifying partisans (and leaners)
dat$party[dat$pid7<4] <- "Democrats"
dat$party[dat$pid7>4 & dat$pid7<8] <- "Republicans"
dat$party <- as.factor(dat$party)

dat$surveytime <- as.POSIXct(dat$endtime) - as.POSIXct(dat$starttime)
dat$surveytime_min <- as.numeric(dat$surveytime)


# Recode vote preference variable so that there is one variable for whether or not respondent is voting for Biden and another for whether or not respondent is voting for Trump
dat$Biden <- NA
dat$Biden[dat$prez20=="Trump" | dat$prez20=="Other" | dat$prez20=="Not sure"] <- 0
dat$Biden[dat$prez20=="Biden"] <- 1

dat$Trump <- NA
dat$Trump[dat$prez20=="Biden" | dat$prez20=="Other" | dat$prez20=="Not sure"] <- 0
dat$Trump[dat$prez20=="Trump"] <- 1

# Recode vote preference variable so that there is one variable for whether or not respondent is voting for a House Dem and another for whether or not respondent is voting for a House Rep
dat$HouseDemocrat <- 0
dat$HouseDemocrat[dat$CC20_367_voted==1 & dat$HouseCand1Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367==1 & dat$HouseCand1Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367_voted==2 & dat$HouseCand2Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367==2 & dat$HouseCand2Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367_voted==3 & dat$HouseCand3Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367==3 & dat$HouseCand3Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367_voted==5 & dat$HouseCand5Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367==5 & dat$HouseCand5Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367_voted==6 & dat$HouseCand6Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367==6 & dat$HouseCand6Party=="Democratic"] <- 1
dat$HouseDemocrat[dat$CC20_367==99 | dat$CC20_367_voted==99] <- NA

dat$HouseRepublican <- 0
dat$HouseRepublican[dat$CC20_367_voted==2 & dat$HouseCand2Party=="Republican"] <- 1
dat$HouseRepublican[dat$CC20_367==2 & dat$HouseCand2Party=="Republican"] <- 1
dat$HouseRepublican[dat$CC20_367_voted==3 & dat$HouseCand3Party=="Republican"] <- 1
dat$HouseRepublican[dat$CC20_367==3 & dat$HouseCand3Party=="Republican"] <- 1
dat$HouseRepublican[dat$CC20_367_voted==4 & dat$HouseCand4Party=="Republican"] <- 1
dat$HouseRepublican[dat$CC20_367==4 & dat$HouseCand4Party=="Republican"] <- 1
dat$HouseRepublican[dat$CC20_367==99 | dat$CC20_367_voted==99] <- NA


#### Reweight without contradictors ####
dat$lvweight <- dat$nationalweight*dat$Voted
dat2 <- subset(dat, type=="No contradictions")

# Create list of population targets from 2019 ACS 
gender <- c(0.49, 0.51)
agecat <- c(0.21, 0.17, 0.16, 0.17, 0.15, 0.14)
race5 <- c(0.63, 0.12, 0.16, 0.06, 0.03)
educ <- c(0.12, 0.28, 0.22, 0.08, 0.19, 0.11)
weduc <- c(0.22, 0.20, 0.13, 0.08, 0.17, 0.11, 0.05, 0.03)
region <- c(0.18, 0.21, 0.38, 0.24)
prez16 <- c(.339, .336, .053, .272) # 2016 prez vote targets only if needed

targets <- list(gender, agecat, race5, educ, weduc, prez16, region)
names(targets) <- c("gender", "agecat",  "race5", "educ", "weduc", "prez16", "region")

outsave <- anesrake(targets, dat2, caseid = dat2$caseid
                    , cap = 8, type = "nolim")
dat2$nationalweight2  <- unlist(outsave[1])

dat2$lvweight2 <- dat2$nationalweight2 * dat2$Voted


#### Appendix Tables B1 and B2 (2020 component): ####
print("Reweighted house vote, 2020")
noc_20 <- dat2 %>%
  summarise(Democrat = round(weighted.mean(HouseDemocrat*100, lvweight2, na.rm=TRUE),1),
            Republican = round(weighted.mean(HouseRepublican*100, lvweight2, na.rm=TRUE),1))
print("Original house vote, 2020")
full_20 <-dat %>%
  summarise(Democrat = round(weighted.mean(HouseDemocrat*100, lvweight, na.rm=TRUE),1),
            Republican = round(weighted.mean(HouseRepublican*100, lvweight, na.rm=TRUE),1))
combined_20 <- full_20 %>% rbind(noc_20)


print("Reweighted trump vote, 2020")
noc_20_trump <- dat2 %>%
  summarise(Biden = round(weighted.mean(Biden*100, lvweight2, na.rm=TRUE),1),
            Trump = round(weighted.mean(Trump*100, lvweight2, na.rm=TRUE),1))
print("Original trump vote, 2020")
full_20_trump <- dat %>%
  summarise(Biden = round(weighted.mean(Biden*100, lvweight, na.rm=TRUE),1),
            Trump = round(weighted.mean(Trump*100, lvweight, na.rm=TRUE),1))
combined_20_trump <- full_20_trump %>% rbind(noc_20_trump)
print("2020 Presidential:")
print(combined_20_trump)
rm(dat)
rm(dat2)